*! v 2.0.6 PR/GA 22apr2003
program define gam, eclass
* 2.0.6/PR: add HUGER option to fit bigger problems (uses gamhug2.exe)
* 2.0.6/GA: fix save `tmp' problems (3 places) by surrounding with compound quotes
* 2.0.5/GA: clear up bug in Stata 8 which merges too many observations (see line 539)
* 2.0.4/PR: limited support for stcox
version 6.0
if replay() {
	if "`e(cmd)'"!="gam" { error 301 }
	preserve
	_gamrslt
	exit
}
global S_E_gam
#delimit ;
syntax varlist(min=1) [if] [in] [aw fw iw] [,
 Family(string) Link(string) DF(string) DEAD(varlist max=1) noMErge
 MIssing(real 9999) BIG Huge HUGER BTolerance(real .0005) LTolerance(real .001) noRefresh];
#delimit cr
/*
	Select executable
 */
if missing("$GAMDIR") { local dir "c:\ado\personal\" }
else local dir $GAMDIR
if !missing("`huger'") {
	local gamprog `dir'gamhug2.exe
	local maxreal 36000
}
else if !missing("`huge'") {
	local gamprog `dir'gamhuge.exe
	local maxreal 6000
}
else if !missing("`big'") {
	local gamprog `dir'gambig.exe
	local maxreal 1000
}
else {
	local gamprog `dir'gamfit.exe
	local maxreal 70
}
cap confirm file `gamprog'
local rc=_rc
errfile `rc' `gamprog'
/*
	Deal with family and link function
*/
local f = lower(trim("`family'"))
local lf = length("`f'")
if missing("`f'") { local fam "gauss" }
else if "`f'"==substr("binomial",1,`lf') { local fam "binom" }
else if "`f'"==substr("gamma",1,max(`lf',3)) { local fam "gamma" }
else if "`f'"==substr("gaussian",1,max(`lf',3)) { local fam "gauss" }
else if "`f'"==substr("poisson",1,`lf') { local fam "poiss" }
else if "`f'"==substr("cox",1,`lf')	{ local fam "cox" }
else if "`f'"==substr("stcox",1,`lf')	{ local fam "stcox" }
else {
	di in red "unknown family() `f'"
	exit 198
}
local li = lower(trim("`link'"))
local lli = length("`li'")
if missing("`li'") {
	if "`fam'"=="gauss" { local l "ident" }
	else if "`fam'"=="binom" { local l "logit" }
	else if "`fam'"=="poiss" { local l "logar" }
	else if "`fam'"=="gamma" { local l "inver" }
	else if "`fam'"=="cox" | "`fam'"=="stcox" { local l "cox" }
}
else if "`li'"==substr("identity",1,max(`lli',3)) { local l "ident" }
else if "`li'"==substr("inverse",1,max(`lli',3)) { local l "inver" }
else if "`li'"==substr("cox",1,`lli') { local l "cox" }
else if "`li'"=="log" { local l "logar" }
else if "`li'"==substr("logit",1,`lli')	{ local l "logit" }
else {
	di in red "invalid link `link'"
	exit 198
}
local interc "no"
if "`fam'"=="cox" {
	if missing("`dead'") {
		di in red "dead() required with cox"
		exit 198
	}
}
else if "`fam'"!="stcox" {
	if !missing("`dead'") {
		di in red "dead() invalid, not Cox model"
		exit 198
	}
	local interc "yes"
}
/*
	Deal with response and predictors
*/
if "`fam'"!="stcox" {
	gettoken y x:varlist
}
else {
	local y _t
	local x `varlist'
	local dead _d
	local fam cox
}
local nx 0
local longn
tokenize `x'
while !missing("`1'") {
	if length("`1'")>6 { local longn "`longn' `1'" }
	local nx = `nx'+1
	local x`nx' `1'
	mac shift
}
if !missing("`longn'") {
	di _n in bl "Length of variable(s)" in ye /*
	*/ "`longn'" in bl " is/are >6 characters."
	di in bl "There may be problems if any name is not unique to 6 chars."
}
if !`nx' {
	di in red "insufficient predictors"
	exit 198
}
/*
	Set up degrees of freedom for smoothers: Default is 1 df (linear)
*/
local d 1
local i 1
while `i'<=`nx' {
	local df`i' `d'
	local i = `i'+1
}
local df_all1 1
if !missing("`df'") {
	tokenize "`df'", parse(",")
	local ncl 0 /* # of comma-delimited clusters */
	while !missing("`1'") {
		if "`1'"!="," {
			local ncl=`ncl'+1
			local clust`ncl' "`1'"
		}
		mac shift
	}
	if `ncl'>`nx' {
		di in red "too many df() values specified"
		exit 198
	}
/*
	Disentangle each varlist:string cluster
*/
	local i 1
	while `i'<=`ncl' {
		tokenize `clust`i'', parse("=:")	
		if "`2'"!=":" & "`2'"!="=" {
			local 3 `1'
			local 1 `x'
			local 2 ":"
		}
		local dfk `3'
		cap confirm num `dfk'
		if _rc {
			di in red "invalid df() value `dfk'"
			exit 198
		}
		if `dfk'<1 {
			di in red "invalid df() value `dfk'"
			exit 198
		}
		if `dfk'>1 { local df_all1 0 }
		unab dfvars:`1'
		tokenize `dfvars'
		while !missing("`1'") {
			local dfv `1'
			local k 0
			local j 1
			while `j'<=`nx' {
				if "`dfv'"=="`x`j''" {
					local k `j'
					local j `nx'
				}
				local j = `j'+1
			}
			if !`k' {
				di in red "`dfv' must be one of the predictors"
				exit 198
			}
			local df`k' `dfk'
			mac shift
		}
		local i = `i'+1
	}
}
if `df_all1' { di in bl "[model is linear, not a GAM---all the df are 1]" }
marksample touse
markout `touse' `dead'
/*
	Weights
*/
qui if !missing("`exp'") {
	tempvar wt
	gen `wt' `exp'
	replace `wt'=. if `wt'<=0
	markout `touse' `wt'
	if "`weight'"=="aweight" {
		sum `wt' if `touse'
		replace `wt' = `wt'/r(mean)
	}
}
/*
	Estimate storage for gam*.exe
*/
local vsize=`nx'+1+!missing("`exp'")	/* 1 is for either constant or dead */
qui count if `touse'
local lworkr=int(.5+r(N)*(`vsize'^.2)/25)
if `lworkr'>`maxreal' {
	di in bl "[Approximate problem size: " `lworkr' "000 reals. " /*
	 */ "Available:" `maxreal' "000 reals.]"
	di in red "problem is too big for `gamprog'"
	exit 2002
}
/*
	Save "index" variable to keep track
	of data order for later merge
*/
quietly {
	sort `x'
	cap drop _index
	gen long _index=_n
/*
	Output data and $.mod files for gamfit.exe
*/
	preserve
	if !missing("`refresh'") {
		cap confirm file $.dat
		if _rc {
			noi di in red "GAM data file $.dat not found"
			exit 601
		}
		cap confirm file $.inx
		if _rc {
			noi di in red "GAM index file $.inx not found"
			exit 601
		}
		local i 0	/* read predictor SDs before est clear */
		while `i'<`nx' {
			local i=`i'+1
			tempname s`i'
			scalar `s`i''=e(gam_s`i')
		}
 	}
	else {
		drop if `touse'==0
		local i 1
		tempvar s
		gen byte `s'=0
		while `i'<=`nx' {
			local xi `x`i''
/*
	Count df for xi (#distinct values)
*/
			sort `xi'
			by `xi': replace `s'=(_n==1 & `xi'!=`missing')
			summ `s', meanonly
			local GAMd`i' `r(sum)'
			if `GAMd`i''<=`df`i'' {
				noi di in red "`df`i'' df for `xi' are too many" /*
				 */ "---only " `GAMd`i'' " distinct values"
				exit 2001
			}
/*
	Standardize each x (including binary predictors)
*/
			sum `xi' if `xi'!=`missing'
			replace `xi'=(`xi'-r(mean))/r(sd) if `xi'!=`missing'
			tempname s`i'
			scalar `s`i''=r(sd) /* sd of predictor */
			local i = `i'+1
		}
		sort _index
		format `y' `x' `wt' `dead' %9.0g	/* to avoid truncation */
		outfile `y' `x' `wt' `dead' using $.dat, comma replace nolabel
		keep _index
		save $.inx, replace
	}
/*
	Create model specification file.
*/
	local i 1
	local model`i'=ltrim("'DATA: ', '$'")
	local i=`i'+1
	local model`i'=ltrim("'P: '," /*
	 */ +string(1+`nx'+("`fam'"=="cox")+!missing("`exp'")))
	local i=`i'+1
	local model`i'=ltrim("'N: ', -1")
	local i=`i'+1
	local model`i'=ltrim("'FORMAT: ', 'free'")
	local i=`i'+1
	local model`i'=ltrim("'MISSING-CODE: ',"+string(`missing'))
	local i=`i'+1
	local model`i'=ltrim("'INTERCEPT: ', `interc'")
	local i=`i'+1
	local model`i'=ltrim("'VARIABLE NAME         MODE           DF'")
	local i=`i'+1
	local model`i'=ltrim("'`y'', 'response', 0")
	local i=`i'+1
	local j 1
	while `j'<=`nx' {
		local model`i'=ltrim("'`x`j''','predictor',"+string(`df`j''))
		local j=`j'+1
		local i=`i'+1
	}
	if !missing("`exp'") {
		local model`i'=ltrim("'`wt'', 'weight', 0")
		local i=`i'+1
	}
	if "`fam'"=="cox" {
		local model`i'=ltrim("'`dead'', 'censoring', 0")
		local i=`i'+1
	}
	local model`i'=ltrim("'FAMILY: ', '`fam''")
	local i=`i'+1
	local model`i'=ltrim("'LINK: ', '`l''")
	local i=`i'+1
/*
	H & T use .001, .001 as tolerances for convergence of local scoring
	and backfitting respectively, while suggesting .0005 for latter in program.
	We use ltolera=.001 (default) and .0005 for greater safety.
	
	17May2002: now adding user option btolerance with default .0005.
*/
	local model`i'=ltrim("'THRESHOLDS: ',"+string(`ltolera')+", "+string(`btolera'))
	local i=`i'+1
/*
	H & T use 20, 15 as max iterations for local scoring and backfitting.
	We use 40, 30 for greater safety.
*/
	local model`i'=ltrim("'MAX ITERS: ', 40, 30")
	local nspec `i'
	drop _all
	set obs `nspec'
	tempvar model
	gen str44 `model'=""
	local blank48 "                                                "
	local i 0
	while `i'<`nspec' {
		local i=`i'+1
		replace `model'=substr("`model`i''`blank48'",1,44) in `i'
	}
	outfile `model' using $.mod, replace noquote
	!`gamprog'
/*
	Read model summary statistics
*/
	drop _all
	cap infile stats using $.out
	local rc=_rc
	errfile `rc' $.out
	tempname fault nobs tdf devpen scale
	scalar `fault'=int(stats[1]+.5)	/* fault code */
	scalar `nobs'=int(stats[2]+.5)	/* number of observations */
	scalar `tdf'=stats[3]		/* error degrees of freedom */
	scalar `devpen'=stats[4]		/* penalized deviance */
	scalar `scale'=stats[5]		/* estimated scale parameter */
/*
	Read fit, residuals and predictors
*/
	drop _all
	if missing("`merge'") {
/*
	Dropping GAM_res and GAM_sres because seem fairly useless.
	Anyway, don't know what GAM_res is for Cox models.
*/
		if ("`fam'"!="cox") {
			cap infile `y' GAM_mu GAM_res GAM_sres `x' using $.fit
			local rc=_rc
			errfile `rc' $.fit
			keep GAM_mu
			label var GAM_mu "GAM fitted values"
			tempfile tmp
			save `"`tmp'"'
		}
/*
	Read individual smooths, confidence bands and partial residuals
*/
		local i 0
		while `i'<`nx' {
			local i=`i'+1
			local vname=substr("`x`i''",1,6)
			drop _all
			cap infile `x`i'' s_`vname' l_`vname' h_`vname'/*
			 */  r_`vname' using `x`i''.gra
			local rc=_rc
			errfile `rc' `x`i''.gra
/*
	SE from confidence band
*/
			gen e_`vname'=(h_`vname'-s_`vname')/1.96
			lab var s_`vname' "GAM `df`i'' df smooth for `x`i''"
			lab var e_`vname' "GAM SE of smooth for `x`i''"
/*
	Cox: Standardize each smooth to mean zero
*/
			if "`fam'"=="cox" {
				sort `x`i''
				drop r_`vname'
				sum s_`vname'
				replace s_`vname'=s_`vname'-r(mean)
			}
			else lab var r_`vname' "GAM partial residual for `x`i''"
			drop `x`i'' h_`vname' l_`vname'
			tempfile tmp`i'
			save `"`tmp`i''"'
		}
		if "`fam'"!="cox" {
			use `tmp', clear
			local i 0
			while `i'<`nx' {
				local i=`i'+1
            			merge using `tmp`i''
            			drop _merge
			}
/*
	Merge with index values
*/
			compress
			merge using $.inx
			drop _merge
			sort _index
			save `"`tmp'"', replace
		}
	}
/*
	Drop variables that might be left over from previous run of gamfit.exe
	& save some estimates
*/
	est clear
	est scalar dev=`devpen'
	est scalar disp=`scale'
	est scalar nobs=`nobs'
	est scalar tdf=`tdf'
	est scalar missing=`missing' /* missing value code */
	est local dead `dead'
	est local depv `y'
	est local fam `fam'
	est local link `l'
	est local vl `x'
	global S_E_dead `dead'	/* double save */
	global S_E_depv `y'
	global S_E_dev=`devpen'
	global S_E_disp=`scale'
	global S_E_fam `fam'
	global S_E_link `l'
	global S_E_nobs=`nobs'
	global S_E_tdf=`tdf'
	global S_E_vl `x'
/*
	Read in summary statistics
*/
	cap drop _all
	cap infile dof slope se z gain pvalue using $.sum
	local rc=_rc
	errfile `rc' $.sum
	local nv1=`nx'+("`fam'"!="cox")
	if `nv1'!=_N { di in red /*
		 */ "inconsistency found when reading GAM results from file $.sum
		exit 2002
	}
	tempname b se
	local i 0
	while `i'<`nv1'{
		local i=`i'+1
		if "`fam'"!="cox" & `i'==`nv1' { 
			local j 0
			local x`nv1' _cons
		}
		else local j `i'
		scalar `b'=slope[`i']
		scalar `se'=se[`i']
		est local gam_x`j' `x`i''
		est scalar gam_df`j'=dof[`i'] 
		est scalar gam_z`j'=z[`i'] 
		est scalar gam_gn`j'=gain[`i']
		est scalar gam_p`j'=pvalue[`i'] 
		if `j' {
			scalar `b'=`b'/`s`i''
			scalar `se'=`se'/`s`i''
			est scalar gam_s`i'=`s`i''
		}
		est scalar gam_sl`j'=`b' /* slope */ 
		est scalar gam_se`j'=`se' /* se(slope) */
	}
	noisily _gamrslt
	restore
	cap drop GAM_mu
	local i 0
	while `i'<`nx' {
		local i=`i'+1
		local vname=substr("`x`i''",1,6)
		cap drop s_`vname'
		cap drop e_`vname'
		cap drop r_`vname'
	}
/*
	Tidy up files left by gam.ado and gam*.exe
*/
	local i 0
	while `i'<`nx' {
		local i=`i'+1
		erase `x`i''.gra
	}
	erase $.mod
	erase $.fit
	erase $.out
	erase $.sum
}
qui if missing("`merge'") {
/*
	Merge existing data with data from gamfit run
*/
	if "`fam'"!="cox" {
		sort _index
		merge _index using `tmp'
		drop _merge
	}
	else {
		gen GAM_mu=0
		tempvar nouse ximpute
		gen byte `nouse'=1-`touse'
		gen `ximpute'=0
		local i 0
		while `i'<`nx' {
			local i=`i'+1
			count if `x`i''==`missing'
			if r(N) {
				replace `ximpute'=`x`i''
				summ `x`i'' if `x`i''!=`missing'	
				replace `ximpute'=r(mean) if `x`i''==`missing'
				sort `nouse' `ximpute'
			}
			else sort `nouse' `x`i''
			merge using `tmp`i''
			drop _merge
			local vname=substr("`x`i''",1,6)
			replace GAM_mu=GAM_mu+s_`vname'
		}			
	}
	drop if missing(_index) 
	count
	noi di _n in ye r(N) in gr " records merged."
}
cap drop _index
est local cmd gam
global S_E_cmd gam	/* double save */
end

program define _gamrslt, eclass
* GAM results
local flo "family `e(fam)', link `e(link)'."
di _n in gr "Generalized Additive Model with `flo'"
#delimit ;
di _n in gr "Model df     = " in ye %9.3f e(nobs)-e(tdf)
	_col(49) in gr "No. of obs = "  in ye %9.0g e(nobs) ;
di in gr "Deviance     = "  in ye %9.0g e(dev)
	_col(49) in gr "Dispersion = "  in ye %9.0g e(disp) ;
#delimit cr
local skip=9-length("`e(depv)'")
di in gr "----------+" _dup(59) "-"
di in gr _skip(`skip') /*
 */ "`e(depv)' |   df    Lin. Coef.  Std. Err.      z        Gain    P>Gain"
di in gr "----------+" _dup(59) "-"
tempname tg totdf b se
scalar `tg'=0
scalar `totdf'=0
local nx: word count `e(vl)'
local nv1=`nx'+("`e(fam)'"!="cox")
local i 0
while `i'<`nv1' {
	local i=`i'+1
	if "`e(fam)'"!="cox" & `i'==`nv1' { local i 0 }
	local v `e(gam_x`i')'
	local skip=9-length("`v'")
	if abs(e(gam_df`i')-1)<1e-6 {
		local fmt %4.0f _skip(3)
	}
	else {
		local fmt %7.3f
		scalar `tg'=`tg'+e(gam_gn`i')
		scalar `totdf'=`totdf'+e(gam_df`i')-1
	}
	local name=substr("`v'",1,6)
	di in gr _skip(`skip') "`v' |" /*
	 */ in ye `fmt' e(gam_df`i') "  " /*
	 */ in ye %9.0g e(gam_sl`i') "  " /*
	 */ in ye %9.0g e(gam_se`i') " " /*
	 */ in ye %9.3f e(gam_z`i') " " /*
	 */ in ye %9.3f e(gam_gn`i') " " /*
	 */ in ye %9.4f e(gam_p`i')
	 if !`i' { local i `nv1' }
}
di in gr _dup(70) "-"
di in gr "Total gain (nonlinearity chisquare) = " in ye %9.3f `tg' /*
 */ in gr " (" in ye %5.3f `totdf' in gr " df), P = " /*
 */ in ye %6.4f chiprob(`totdf',`tg')
end

program define errfile
* 1=rc, 2=file giving problems.
if `1' {
	noi di in red "GAMFIT failure, `2' not found"
	exit `rc'
}
end
